## Twitter ## 
# dictionary based sentiment analysis
length(data_dictionary_LSD2015)
data_dictionary_LSD2015

corp_sentiment <- dfm_group(mep_dfm) %>% 
  dfm_lookup(dictionary = data_dictionary_LSD2015[1:2])
corp_sentiment

# as a percentage 
corp_sentiment_prop <- dfm_group(mep_dfm) %>% 
  dfm_weight(scheme = "prop") %>% 
  dfm_lookup(dictionary = data_dictionary_LSD2015[1:2])
corp_sentiment_prop

sent <- convert(corp_sentiment_prop, to = "data.frame")
sent$docnum <- 1:32044


## immigration dictionary (based on Ruedin & Morales 2017)

# creating the dictionary
immidict <- dictionary(list(immigration = c("asylum", "border",
                                                "citizen*", "cultur*",
                                                "deport*", "ethnic*",
                                                "foreign*", "halal",
                                                "hallal", "identity",
                                                "immigr*", "integrat*",
                                                "irregular", "migrant*",
                                                "*migration*", "minorit*",
                                            "multicultur*", "naturalis*",
                                            "naturaliz*", "permit",
                                            "refug*", "religious", "reunion",
                                            "temple", "unauthorised", "unauthorized",
                                            "unity", "evac*", "flee*",
                                            "airport", "checkpoint", "escap*")))

# alternative dictionary for robustness (my main dictionary only uses neutral terms 
# from Ruedin & Morales. 
# This dictionary is just a 1for1 copy of their Yoshikoder dictionary)
#immidict <- dictionary(list(immigration = c("abuse","assimil","asylum",
#                                            "avalanche","border","burqa",
#                                            "christian","citizen","cultur",
#                                            "custom","deport","discriminat",
#                                            "diversity","ethnic","extremis",
#                                            "flood","foreign","fraud","halal",
#                                            "hallal","headscarf","human",
#                                            "identity","illegal","immigr",
#                                            "indigenous","integrat","invasion",
#                                            "irregular","islam","jihad",
#                                            "migrant","migration","minaret",
#                                            "minorit","mosque","multicultur",
#                                            "muslim","nation","native",
#                                            "naturalis","naturaliz","permit",
#                                            "raci","radical","refug",
#                                            "religious","reunion","shari'a",
#                                            "sharia","shariah","shelter",
#                                            "temple","terroris","toleran",
#                                            "tradition","traumatis",
#                                           "traumatiz","unauthorised",
#                                            "unauthorized","unity","veil",
#                                            "xenophob")))


# searching the tokens for matches with the dictionary
toks_dict <- tokens_lookup(meptoks, dictionary = immidict)
print(toks_dict)

dfm_dict<-dfm(toks_dict)
dfm_dict

dictdf <- convert(dfm(toks_dict), to="data.frame")
dictdf$docnum <- 1:32044


# merging the original dataframe with the dictionary matches and the sentiments
c <- full_join(alltweets, dictdf)
c <- full_join(c, sent)

c$sentiment <- c$positive - c$negative


c <- tidyr::separate(c, created_at, c("date", "time"), sep = "T")
c$date <- as.Date(c$date)
c$date <- as.numeric(c$date)
c$date <- c$date - 18854 # 15 Aug 2021 is 0 now (Taliban Takeover) 

# creating subsets for plots

## eastern/western Europe

c$eastwest <- NA
c$eastwest[c$country == "Belgium" | c$country == "Denmark" |
             c$country == "Finland" | c$country == "France" |
             c$country == "Germany" | c$country == "Ireland" |
             c$country == "Italy" | c$country == "Luxembourg" |
             c$country == "Malta" | c$country == "Netherlands" |
             c$country == "Portugal" | c$country == "Spain" |
             c$country == "Sweden"] <- "West"

c$eastwest[c$country == "Austria" | c$country == "Bulgaria" |
             c$country == "Croatia" | c$country == "Cyprus" |
             c$country == "Czech Republic" | c$country == "Estonia" |
             c$country == "Greece" | c$country == "Hungary" |
             c$country == "Latvia" | c$country == "Lithuania" |
             c$country == "Poland" | c$country == "Romania" |
             c$country == "Slovakia" | c$country == "Slovenia"] <- "East"

## Left-Right Ideology
c$lr <- NA
c$lr[c$eu_lrgen < 3.9] <- "Left-leaning"
#c$lr[c$eu_lrgen > 3.9 & c$eu_lrgen < 6.9] <- "Centrist" keeping centrists out of the plot for better visibility of left and right
c$lr[c$eu_lrgen > 6.9] <- "Right-leaning"


## prevalence by day
c <- c %>%
  group_by(date) %>%
  mutate(obs = n())

prev <- c %>%
  group_by(date, obs) %>%
  filter(n()>9) %>%
  summarize(mean = mean(immigration, na.rm = TRUE)) %>%
  mutate(up = mean + 1.96 * sqrt((mean * (1 - mean)) / obs)) %>% 
  mutate(low = mean - 1.96 * sqrt((mean * (1 - mean)) / obs)) %>% 
  ungroup()

## sentiment by day
senti <- c %>%
  filter(immigration > 0) %>% 
  group_by(date) %>%
  filter(n()>9) %>%
  summarize(mean = mean(sentiment, na.rm = TRUE)) %>%
  ungroup()


# prevalence by day, split by ideology and region
c$both <- NA
c$both[c$lr == "Left-leaning" & c$eastwest == "West"]  <- "Western Left"
c$both[c$lr == "Centrist" & c$eastwest == "West"]  <- "Western Center"
c$both[c$lr == "Right-leaning" & c$eastwest == "West"]  <- "Western Right"
c$both[c$lr == "Left-leaning" & c$eastwest == "East"]  <- "Eastern Left"
c$both[c$lr == "Centrist" & c$eastwest == "East"]  <- "Eastern Center"
c$both[c$lr == "Right-leaning" & c$eastwest == "East"]  <- "Eastern Right"

  
prevboth <- c %>%
  group_by(date, both) %>%
  summarize(mean = mean(immigration, na.rm = TRUE)) %>%
  ungroup() %>% 
  drop_na(both)

sentiboth <- c %>%
  filter(immigration > 0) %>% 
  group_by(date, both) %>%
  summarize(mean = mean(sentiment, na.rm = TRUE)) %>%
  ungroup() %>% 
  drop_na(both)

sentiboth2 <- sentiboth %>% 
  filter(date > -6 & date < 6)

# Define custom colors for the groups
custom_colors <- c("Eastern Left" = "#CC0000", "Western Left" = "#FF6666", 
                   "Eastern Right" = "#004C99", "Western Right" = "#66B2FF")

sentbothplot <- ggplot(sentiboth2, aes(x = date, y = mean, color = both)) + 
  geom_line(size = 2) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  scale_color_manual(values = custom_colors) +
  ggtitle("Sentiment by Region and Ideology")
sentbothplot

prevboth2 <- prevboth %>% 
  filter(date > -6 & date < 6)

prevbothplot <- ggplot(prevboth2, aes(x = date, y = mean, color = both)) + 
  geom_line(size = 2) +
  xlab("") +
  ylab("") +
  scale_x_continuous(breaks= c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  scale_color_manual(values = custom_colors) +
  ggtitle("Prevalence by Region and Ideology")
prevbothplot


# plots
dictplot <- ggplot(prev, aes(x = date, y = mean, group = 1)) + 
  geom_path(size = 2, color = "slateblue4") +
  geom_path(aes(x = date, y = up),size = 1, linetype = "dashed") + 
  geom_path(aes(x = date, y = low),size = 1, linetype = "dashed") + 
  scale_x_continuous(breaks= c(-15, -10, -5, 0, 5, 10, 15)) +
  xlab("Days") +
  ylab("Prevalence") +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("C: Twitter Dictionary")
dictplot

senti$upper_green <- pmax(senti$mean, 0)
senti$lower_green <- 0
senti$upper_red <- 0
senti$lower_red <- pmin(senti$mean, 0)

aplot <- ggplot(senti, aes(x = date, y = mean)) + 
  geom_line(size = 2, color = "slateblue4") +
  geom_ribbon(aes(ymin = lower_green, ymax = upper_green), fill = "green", alpha = 0.5) +
  geom_ribbon(aes(ymin = lower_red, ymax = upper_red), fill = "red", alpha = 0.5) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-15, -10, -5, 0, 5, 10, 15)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("A: Twitter Lexicoder")
aplot


### sentiment analysis with VADER ### 
#c2 <- c %>%  # UNCOMMENT THIS IF YOU WANT TO RERUN VADER
#  filter(immigration > 0)
#c2$number <- 1:3265

#set.seed(2023)
#test <- vader_df(c2$text_translated)
#test$number <- 1:3265
#d <- full_join(c2, test, by = "number")
#save(d, file = "d.RData")
load("d.RData") # load this to skip running the VADER sentiment analyzer

## sentiment by day
senti2 <- d %>%
  filter(immigration > 0) %>% 
  group_by(date) %>%
  filter(n()>9) %>%
  summarize(mean = mean(compound, na.rm = TRUE)) %>%
  ungroup()


# plots
senti2$upper_green <- pmax(senti2$mean, 0)
senti2$lower_green <- 0
senti2$upper_red <- 0
senti2$lower_red <- pmin(senti2$mean, 0)

bplot <- ggplot(senti2, aes(x = date, y = mean)) + 
  geom_line(size = 2, color = "slateblue4") +
  geom_ribbon(aes(ymin = lower_green, ymax = upper_green), fill = "green", alpha = 0.5) +
  geom_ribbon(aes(ymin = lower_red, ymax = upper_red), fill = "red", alpha = 0.5) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-15, -10, -5, 0, 5, 10, 15)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("B: Twitter VADER")
bplot 



## Factiva ##

# dictionary based sentiment analysis
length(data_dictionary_LSD2015)
data_dictionary_LSD2015

load("news.RData")

news <- news %>% 
  drop_na(clean_text)


news$docnum <- 1:495

newscorp <- corpus(news,text_field="clean_text")

newstoks <- tokens(newscorp,
                  remove_punct=T,
                  remove_numbers=T) %>%
  tokens_remove(stopwords("en"))

newstoks <- tokens_remove(newstoks, c("can", "now", "like", "just", "us", "yes", "something", "must"))


news_dfm <- dfm(newstoks)
news_dfm


corp_sentiment <- dfm_group(news_dfm) %>% 
  dfm_lookup(dictionary = data_dictionary_LSD2015[1:2])
corp_sentiment

# as a percentage 
corp_sentiment_prop <- dfm_group(news_dfm) %>% 
  dfm_weight(scheme = "prop") %>% 
  dfm_lookup(dictionary = data_dictionary_LSD2015[1:2])
corp_sentiment_prop


sent <- convert(corp_sentiment_prop, to = "data.frame")
sent$docnum <- 1:495

## immigration dictionary (based on Ruedin & Morales 2017)

# creating the dictionary
immidict <- dictionary(list(immigration = c("asylum", "border",
                                            "citizen*", "cultur*",
                                            "deport*", "ethnic*",
                                            "foreign*", "halal",
                                            "hallal", "identity",
                                            "immigr*", "integrat*",
                                            "irregular", "migrant*",
                                            "*migration*", "minorit*",
                                            "multicultur*", "naturalis*",
                                            "naturaliz*", "permit",
                                            "refug*", "religious", "reunion",
                                            "temple", "unauthorised", "unauthorized",
                                            "unity", "evac*", "flee*",
                                            "airport", "checkpoint", "escap*")))


# searching the tokens for matches with the dictionary
toks_dict <- tokens_lookup(newstoks, dictionary = immidict)
print(toks_dict)

dfm_dict<-dfm(toks_dict)
dfm_dict

dictdf <- convert(dfm(toks_dict), to="data.frame")
dictdf$docnum <- 1:495


# merging the original dataframe with the dictionary matches and the sentiments
e <- full_join(news, dictdf)
c3 <- full_join(e, sent)

c3$sentiment <- c3$positive - c3$negative

## sentiment by day
senti3 <- c3 %>%
  group_by(tal) %>%
  summarize(mean = mean(sentiment, na.rm = TRUE)) %>%
  ungroup()

# plots
senti3$upper_green <- pmax(senti3$mean, 0)
senti3$lower_green <- 0
senti3$upper_red <- 0
senti3$lower_red <- pmin(senti3$mean, 0)

cplot <- ggplot(senti3, aes(x = tal, y = mean)) + 
  geom_line(size = 2, color = "slateblue4") +
  geom_ribbon(aes(ymin = lower_green, ymax = upper_green), fill = "green", alpha = 0.5) +
  geom_ribbon(aes(ymin = lower_red, ymax = upper_red), fill = "red", alpha = 0.5) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-15, -10, -5, 0, 5, 10, 15)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("C: Factiva Lexicoder")
cplot



### sentiment analysis with VADER ### 
#c3$text_trimmed <- substr(c3$clean_text, 1, 9999) # UNCOMMENT THIS IF YOU WANT TO RERUN VADER

#set.seed(2023)
#v <- as.data.frame(vader_df(c3$text_trimmed))

#v$docnum <- 1:495
#d3 <- full_join(c3, v, by = "docnum")
#save(d3, file = "d3.RData")
load("d3.RData") # load this to skip running the VADER sentiment analyzer

## sentiment by day
senti4 <- d3 %>%
  group_by(tal) %>%
  summarize(mean = mean(compound, na.rm = TRUE)) %>%
  ungroup()

# plots
senti4$upper_green <- pmax(senti4$mean, 0)
senti4$lower_green <- 0
senti4$upper_red <- 0
senti4$lower_red <- pmin(senti4$mean, 0)

dplot <- ggplot(senti4, aes(x = tal, y = mean)) + 
  geom_line(size = 2, color = "slateblue4") +
  geom_ribbon(aes(ymin = lower_green, ymax = upper_green), fill = "green", alpha = 0.5) +
  geom_ribbon(aes(ymin = lower_red, ymax = upper_red), fill = "red", alpha = 0.5) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-15, -10, -5, 0, 5, 10, 15)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("D: Factiva VADER")
dplot

